library(censusapi)
library(tidycensus)
library(tidyverse)
library(sf)
library(geojsonsf)
library(mapview)
library(dplyr)
library(plotly)
library(tigris)
library(readxl)
library(leaflet)
library(RColorBrewer)
library(sp)
library(usmap)
mapviewOptions(
basemaps = "CartoDB.Positron",
#vector.palette = colorRampPalette(cols)
)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="0c313bd613a7281ae62c2fbb004d156d647e9c94")
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
# Obtain the census variable lookup data
# acs_vars <-
# listCensusMetadata(
# name = "2018/acs/acs5",
# type = "variables"
# )
# Save an .rds version
# saveRDS(acs_vars, file = "/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")
# Obtain the saved census data
acs_vars = readRDS("/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")
# Get Santa Clara block groups
sc_blockgroups <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
# Get San Jose geometry
sj_geom <- places("CA", cb = F, progress_bar = FALSE) %>%
filter(NAME == "San Jose") %>%
st_transform('+proj=longlat +datum=WGS84')
# Filter to Santa Clara block groups within San Jose geometry
sj_blockgroups <- sc_blockgroups[st_contains(sj_geom, sc_blockgroups %>% st_centroid())[[1]],]
These vulnerability maps are intended to determine the San Jose block groups with limited access to information, determined by lack of computers and lack of internet within a household; as well as the block groups in need of resources, determined by a measure of household median income.
sc_internet <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B28002)"
) %>%
mutate(
blockgroup = paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
-blockgroup) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>%
filter(is.na(subscription) | subscription == "No Internet access") %>%
mutate(
subscription = replace_na(subscription, "total_num")
) %>%
dplyr::select(blockgroup, estimate, subscription) %>%
spread(key = subscription, value = estimate) %>%
mutate(
percent_no_internet = (`No Internet access`*100 / total_num) %>% round(1)
)
sj_blockgroups_internet <-
sc_internet %>%
filter(blockgroup %in% sj_blockgroups$GEOID) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
st_as_sf() %>%
st_set_crs(4326) %>%
st_transform(projection)
total_no_internet_sj = sum(sj_blockgroups_internet$`No Internet access`)
total_sj <- sum(sj_blockgroups_internet$total_num)
perc_no_internet_sj <- total_no_internet_sj*100/total_sj
print(perc_no_internet_sj)
## [1] 8.052816
sj_blockgroups_full <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84') %>%
filter(GEOID %in% sj_blockgroups$GEOID)
mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "Percentage of households without internet access")
sc_computer <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B28003)"
) %>%
mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "computer", "internet"), sep = "!!") %>%
filter(is.na(computer) | computer == "No computer") %>%
mutate(computer = replace_na(computer, "total_num")) %>%
dplyr::select(blockgroup, estimate, computer) %>%
spread(key = computer, value = estimate) %>%
mutate(percent_no_computer = (`No computer`*100 / total_num) %>% round(1))
sj_blockgroups_computer <-
sc_computer %>%
filter(blockgroup %in% sj_blockgroups$GEOID) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
st_as_sf() %>%
st_set_crs(4326) %>%
st_transform(projection)
total_no_computer_sj = sum(sj_blockgroups_computer$`No computer`)
total_sj_comp <- sum(sj_blockgroups_computer$total_num)
perc_no_computer_sj <- total_no_computer_sj*100/total_sj_comp
print(perc_no_computer_sj)
## [1] 5.840027
mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "Percentage of households without a computer")
Using area median income (AMI) is a more appropriate way to measure low-income status, as the cost of living is substantially higher than the federal average in San Jose.
sj_median_income_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "B19013_001E"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
rename(
Median_Income = B19013_001E
) %>%
filter(!is.na(Median_Income)) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
na.omit()
sj_ami_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B19001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
group_by(blockgroup) %>%
summarize(
Total = B19001_001E,
`Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
#sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
`Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E)
) %>%
mutate(
`% under 75,000` = `Under 75,000` / Total * 100,
`% under 100,000` = `Under 100,000` / Total * 100
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)
)
sj_ami_by_block <-
sj_ami_by_block %>%
st_as_sf()
mapview(sj_ami_by_block, zcol = "% under 100,000")
# m1 <- mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "% Households Without Internet")
#
# m2 <- mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "% Households Without Computer")
#
# m3 <- mapview(sj_ami_by_block, zcol = "% under 100,000", layer.name = "% Households Under 80% AMI")
#
# if (interactive()) {
# library(plainview)
# m1 + m2
# }
#
# mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "Percentage of households without internet access") + mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "Percentage of households without a computer")
This segment explores different outreach options to target the most vulnerable block groups identified, starting with exploring the language breakdown of different block groups. The first outreach option is placing informational posters in highly visited essential businesses. The second outreach option is placing the posters on street poles. The final outreach option is placing the posters on bus routes.
sj_lang_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B16004)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
left_join(acs_vars, by = c("variable" = "name")) %>%
mutate(
tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
) %>%
filter(tier %in% c('Speak English "not well"',
'Speak English "not at all"',
'Total', 'Speak Spanish',
'Speak Asian and Pacific Island languages')) %>%
group_by(blockgroup, tier) %>%
summarise(
estimate1 = sum(estimate)
) %>%
spread(
key = "tier",
value = "estimate1"
) %>%
mutate(
`% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
`% speaking spanish` = (`Speak Spanish`/ Total) * 100,
`% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
mutate(
log_perc = log(`% speaking english < well`)
) %>%
filter(log_perc > 0) %>%
st_as_sf()
mapview(sj_lang_by_block)
A block with high % of no internet access:
example <-
sj_blockgroups_internet[29,]
mapview(example)
# This creates file paths for the SafeGraph folder and COVID-19 GitHub folder.
sg_path <- "/Users/armellecoutant/pCloud Drive/SFBI/Restricted Data Library/Safegraph/"
github_path <- "/Users/armellecoutant/Documents/GitHub/covid19/"
# This sets URLs for Point of Interest (POI) location data.
poi_url_Mar2020 <-
paste0(sg_path, "/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/ca_poi_Mar20.rds")
poi_url_Apr2020 <-
paste0(sg_path, "/core/2020/04/CoreApr2020Release-CORE_POI-2020_03-2020-04-07/ca_poi_Apr20.rds")
poi_url_2019 <-
paste0(sg_path,'covid19analysis/ca_poi.rds')
Can be associated with grocery store locations to determine which grocery stores to target for outreach.
poi_caApr20 <- readRDS (poi_url_Apr2020)
poi_sj <-
poi_caApr20 %>%
filter(city == "San Jose") %>%
filter(naics_code == "445110")
coordinates(poi_sj) <- c("longitude", "latitude")
proj4string(poi_sj) <- CRS("+init=epsg:4326")
mapview(poi_sj)
# shapes <-
# read_csv("/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/digital_inclusion/gtfs_vta/shapes.txt")
This script includes code adapted from the following sources: - simone_nfo_internet.Rmd - Dem_analysis_plotly.Rmd